package com.wind.media;

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import net.sourceforge.tess4j.util.LoadLibs;

import java.io.File;

/**
 * <p>
 *    tesseract for java， ocr（Optical Character Recognition，光学字符识别）
 * </p>
 * @author wind
 * @date    2024/12/11 14:40
 * @version v1.0
 */
public class Tess4jUtil {


    /**
     * 从图片中提取文字,默认设置英文字库,使用classpath目录下的训练库
     * @param path  图片路径
     * @param resource 识别库路径
     * @param name  识别库名称 eng.traineddata, chi_sim.traineddata
     * @return
     */
    public static String getText(String path, String resource, String name){
        // JNA Interface Mapping
        ITesseract instance = new Tesseract();
        // JNA Direct Mapping
        // ITesseract instance = new Tesseract1();
        File imageFile = new File(path);
        //In case you don't have your own tessdata, let it also be extracted for you
        File dataPath = new File(resource);
        if(!dataPath.exists()){
            //classpath目录下的训练库了
            dataPath = LoadLibs.extractTessResources(resource);
        }
        //Set the tessdata path
        instance.setDatapath(dataPath.getAbsolutePath());
        //英文库识别数字比较准确
        instance.setLanguage(name);
        return getText(instance, imageFile);
    }

    /**
     * 识别图片文件中的文字
     * @param instance
     * @param imageFile
     * @return
     */
    private static String getText(ITesseract instance, File imageFile){
        String result = null;
        try {
            result = instance.doOCR(imageFile);
        } catch (TesseractException e) {
            e.printStackTrace();
        }
        return result;
    }
}
